###########################
library(tidyverse)
library(dplyr)
library(janitor)

rm(list = ls())

###########################

texts <- readxl::read_excel("replicable_data/texts.xlsx")

###########################

# Network

network <- texts %>% 
  add_count(id) %>% 
  filter(n > 1) %>% 
  select(id, university) %>% 
  group_by(id) %>% 
  mutate(id_n = cur_group_id())


edges <- data.table::setDT(network)[, {i1 <-  combn(university, 2)
list(i1[1,], i1[2,]) }, by =  id_n]

edges <- edges %>% 
  rename(university1 = V1,
         university2 = V2) %>% 
  mutate(to_count = paste(university1, university2, sep = "-")) %>% 
  add_count(to_count) %>% 
  select(-c(to_count, id_n)) %>% 
  distinct() %>% 
  mutate(n_10 = n/10)


nodes <- network %>% 
  count(university) %>% 
  rename(freq = n) %>% 
  distinct() %>% 
  mutate(freq_10 = freq / 10)

###########################

# Network estimation

ig <- igraph::graph_from_data_frame(d = edges, 
                                    vertices = nodes, 
                                    directed = FALSE)

###########################
 
# Network metrics 

igraph::mean_distance(ig)

igraph::degree(ig, mode = "total", normalized = TRUE)
igraph::authority.score(ig)$vector
igraph::betweenness(ig, normalized = TRUE)
igraph::closeness(ig, normalized = TRUE)


# Network plot

plot(ig, vertex.size = nodes$freq_10, edge.width = edges$n_10, normalized = TRUE,
      vertex.label.cex = 1.5, edge.color = "red")

